{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## A simple regression training using LightGBM through Fairing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from time import gmtime, strftime\n",
    "from kubeflow import fairing\n",
    "from kubeflow.fairing.frameworks import lightgbm\n",
    "\n",
    "# Setting up google container repositories (GCR) for storing output containers\n",
    "# You can use any docker container registry istead of GCR\n",
    "GCP_PROJECT = fairing.cloud.gcp.guess_project_name()\n",
    "DOCKER_REGISTRY = 'gcr.io/{}/fairing-job'.format(GCP_PROJECT)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup docker credentials for AppendBuilder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import subprocess\n",
    "subprocess.check_call([\"gcloud\", \"auth\", \"configure-docker\", \"--quiet\"])\n",
    "if os.getenv('GOOGLE_APPLICATION_CREDENTIALS'):\n",
    "    subprocess.check_call([\"gcloud\", \"auth\", \"activate-service-account\",\n",
    "                         \"--key-file=\" + os.getenv(\"GOOGLE_APPLICATION_CREDENTIALS\"),\n",
    "                         \"--quiet\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Launch a LightGBM train task"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Creating a bucket for copying the trained model. \n",
    "# You can set gcs_bucket variable to an existing bucket name if that is desired.\n",
    "gcs_bucket = \"gs://{}-fairing\".format(GCP_PROJECT)\n",
    "!gsutil mb {gcs_bucket}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "params = {\n",
    "    'task': 'train',\n",
    "    'boosting_type': 'gbdt',\n",
    "    'objective': 'regression',\n",
    "    'metric': 'l2',\n",
    "    'metric_freq': 1,\n",
    "    'num_leaves': 31,\n",
    "    'learning_rate': 0.05,\n",
    "    'feature_fraction': 0.9,\n",
    "    'bagging_fraction': 0.8,\n",
    "    'bagging_freq': 5,\n",
    "    \"n_estimators\": 10,\n",
    "    \"is_training_metric\": \"true\",\n",
    "    \"valid_data\": \"gs://fairing-lightgbm/regression-example/regression.test\",\n",
    "    \"train_data\": \"gs://fairing-lightgbm/regression-example/regression.train\",\n",
    "    'verbose': 1,\n",
    "    \"verbose_eval\": 1,\n",
    "    \"model_output\": \"{}/lightgbm/example/model_{}.txt\".format(gcs_bucket, strftime(\"%Y_%m_%d_%H_%M_%S\", gmtime())),\n",
    "    \"num_machines\": 3,\n",
    "    \"tree_learner\": \"feature\"\n",
    "\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "lightgbm.execute(config=params,\n",
    "                          docker_registry=DOCKER_REGISTRY,\n",
    "                          cores_per_worker=2, # Allocating 2 CPU cores per worker instance\n",
    "                          memory_per_worker=0.5, # Allocating 0.5GB of memory per worker instance\n",
    "                          stream_log=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Let's look at the trained model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = params['model_output']\n",
    "model_name = os.path.split(url)[1]\n",
    "!gsutil cp {url} /tmp/{model_name}\n",
    "!head /tmp/{model_name}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Runnig a prediction task using the trained model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "predict_params = {\n",
    "    \"task\": \"predict\",\n",
    "    'metric': 'l2',\n",
    "    \"data\": \"gs://fairing-lightgbm/regression-example/regression.test\",\n",
    "    \"input_model\": params['model_output'],\n",
    "    \"output_result\": \"{}/lightgbm/example/prediction_result_{}.txt\".format(gcs_bucket, model_name)\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lightgbm.execute(config=predict_params, docker_registry=DOCKER_REGISTRY)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = predict_params['output_result']\n",
    "file_name = os.path.split(url)[1]\n",
    "!gsutil cp {url} /tmp/{file_name}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "predictions = pd.read_csv(\"/tmp/{}\".format(file_name), header=None)\n",
    "print(\"Prediction mean: {}, count: {}\".format(predictions.mean()[0], predictions.count()[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
